In [1]:
import pandas as pd
In [3]:
df = pd.read_csv('result.csv')
df
Out[3]:
Name Sex Cnt Year
0 Mary F 7065 1880
1 Anna F 2604 1880
2 Emma F 2003 1880
3 Elizabeth F 1939 1880
4 Minnie F 1746 1880
... ... ... ... ...
1989396 Zyheem M 5 2019
1989397 Zykel M 5 2019
1989398 Zyking M 5 2019
1989399 Zyn M 5 2019
1989400 Zyran M 5 2019

1989401 rows × 4 columns

In [8]:
result = df.groupby(['Year', 'Sex']).sum()
In [6]:
df2019 = pd.read_csv('yob2019.txt', names=['Name', 'Sex', 'Cnt'])
df2019.Cnt.sum()
Out[6]:
3445321
In [7]:
1665373+1779948
Out[7]:
3445321
In [9]:
result
Out[9]:
Cnt
Year Sex
1880 F 90994
M 110490
1881 F 91953
M 100743
1882 F 107847
... ... ...
2017 M 1845472
2018 F 1694640
M 1809166
2019 F 1665373
M 1779948

280 rows × 1 columns

In [11]:
result.index.get_level_values(0)
Out[11]:
Int64Index([1880, 1880, 1881, 1881, 1882, 1882, 1883, 1883, 1884, 1884,
            ...
            2015, 2015, 2016, 2016, 2017, 2017, 2018, 2018, 2019, 2019],
           dtype='int64', name='Year', length=280)
In [12]:
result.index.get_level_values(1)
Out[12]:
Index(['F', 'M', 'F', 'M', 'F', 'M', 'F', 'M', 'F', 'M',
       ...
       'F', 'M', 'F', 'M', 'F', 'M', 'F', 'M', 'F', 'M'],
      dtype='object', name='Sex', length=280)
In [14]:
result.loc[result.index.get_level_values(1) == 'M']
Out[14]:
Cnt
Year Sex
1880 M 110490
1881 M 100743
1882 M 113686
1883 M 104625
1884 M 114442
... ... ...
2015 M 1913059
2016 M 1893471
2017 M 1845472
2018 M 1809166
2019 M 1779948

140 rows × 1 columns

In [17]:
result.loc[ result.index.get_level_values(0).isin(range(1880, 1891)) ]
Out[17]:
Cnt
Year Sex
1880 F 90994
M 110490
1881 F 91953
M 100743
1882 F 107847
M 113686
1883 F 112319
M 104625
1884 F 129019
M 114442
1885 F 133055
M 107799
1886 F 144533
M 110784
1887 F 145982
M 101412
1888 F 178622
M 120851
1889 F 178366
M 110580
1890 F 190377
M 111025
In [18]:
result.loc[ 
    (result.index.get_level_values(0).isin(range(1880, 1891)))
    &
    (result.index.get_level_values(1) == 'M')
]
Out[18]:
Cnt
Year Sex
1880 M 110490
1881 M 100743
1882 M 113686
1883 M 104625
1884 M 114442
1885 M 107799
1886 M 110784
1887 M 101412
1888 M 120851
1889 M 110580
1890 M 111025
In [21]:
result2 = result.unstack('Sex')
In [22]:
result2
Out[22]:
Cnt
Sex F M
Year
1880 90994 110490
1881 91953 100743
1882 107847 113686
1883 112319 104625
1884 129019 114442
... ... ...
2015 1781725 1913059
2016 1767902 1893471
2017 1721550 1845472
2018 1694640 1809166
2019 1665373 1779948

140 rows × 2 columns

In [25]:
result2.loc[1880][1]
Out[25]:
110490
In [29]:
result2.loc[2019].Cnt.F
Out[29]:
1665373
In [31]:
result2.Cnt
Out[31]:
Sex F M
Year
1880 90994 110490
1881 91953 100743
1882 107847 113686
1883 112319 104625
1884 129019 114442
... ... ...
2015 1781725 1913059
2016 1767902 1893471
2017 1721550 1845472
2018 1694640 1809166
2019 1665373 1779948

140 rows × 2 columns

In [32]:
result2
Out[32]:
Cnt
Sex F M
Year
1880 90994 110490
1881 91953 100743
1882 107847 113686
1883 112319 104625
1884 129019 114442
... ... ...
2015 1781725 1913059
2016 1767902 1893471
2017 1721550 1845472
2018 1694640 1809166
2019 1665373 1779948

140 rows × 2 columns

In [33]:
result2.plot()
Out[33]:
<matplotlib.axes._subplots.AxesSubplot at 0x21903b947c0>
In [34]:
import matplotlib.pyplot as plt
In [42]:
plt.figure(figsize=(10, 6))
plt.plot(result2.index, result2['Cnt'])
plt.gcf().axes[0].yaxis.get_major_formatter().set_scientific(False)

plt.xticks( range(1880, result2.index.max()+1, 5), rotation='vertical' )
# plt.yticks(range(0, int(round(result2['count'].max()))+5000, 1000))

plt.xlabel('Годы')
plt.ylabel('Кол-во имен')

# for index, value in enumerate(result2['count']):
#     plt.text(
#         index,
#         value,
#         value,
#         rotation=90,
#         size='10',
#         color='#000',
#         ha='center')

plt.grid()
plt.legend(['Cnt, F', 'Cnt, M'])
# plt.savefig('Count.png', dpi=100)
plt.show()
In [38]:
result2.index.max()
Out[38]:
2019
In [ ]: